# -*- coding: utf-8 -*-
"""
过拟合和欠拟合
Created on Mon Apr  2 20:45:19 2018

@author: Allen
"""
import numpy as np
import matplotlib.pyplot as plt

x = np.random.uniform( -3, 3, size = 100 )
X = x.reshape( -1, 1 )

y = 0.5 * x**2 + x + 2 + np.random.normal( 0, 1, size = 100 )
plt.scatter( x, y )
plt.show()

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit( X, y )
y_predict = lin_reg.predict( X )

from sklearn.metrics import mean_squared_error
y_predict = lin_reg.predict( X )
print( mean_squared_error( y, y_predict ) ) # 3.18254562675

# 使用多项式回归
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler

def PolynomialRegression( degree ):
    return Pipeline([
                ( "poly", PolynomialFeatures( degree = 2 ) ),
                ( "std_scaler", StandardScaler() ),
                ( "lin_reg", LinearRegression() )
            ])
poly2_reg = PolynomialRegression( 2 )
poly2_reg.fit( X, y )
y2_predict = poly2_reg.predict( X )
print( mean_squared_error( y, y2_predict ) ) # 0.759930394807

'''
当degree为1时，欠拟合
当degree为100时，虽然，均方误差很低，但是曲线并不是我们所需要的，这是过拟合
'''